import requests
import re

'''
def get_one_page(url):
    cookies = '__mta=49599897.1617803824974.1617803883061.1617804225707.4; uuid_n_v=v1; uuid=1BE9176097A911EBB5EB650B1E18C195391CC7736E9A496DBFCF23FE024BA88C; _csrf=db2b6c86cd03c59b8e44b7354a4ca8361c55e024a715a480b450d8ff9c6d96b6; Hm_lvt_703e94591e87be68cc8da0da7cbd0be2=1617803825; _lxsdk_cuid=178ac9f4e33c8-0d0db7013f037c-1b1f424f-2a3000-178ac9f4e3337; _lxsdk=1BE9176097A911EBB5EB650B1E18C195391CC7736E9A496DBFCF23FE024BA88C; Hm_lpvt_703e94591e87be68cc8da0da7cbd0be2=1617804226; _lxsdk_s=178ac9f4e34-070-1c8-1d2%7C%7C9'
    jar = requests.cookies.RequestsCookieJar()
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X -1_0_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
    }
    for cookie in cookies.split(';'):
        key, value = cookie.split('=', 1)
        jar.set(key, value)
    response = requests.get(url, cookies=jar, headers=headers)
    if response.status_code == 200:
        return response.text
    return None
'''


def parse_one_page(html2):
    pattern = re.compile('<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>', re.S)
    items = re.findall(pattern, html2)
    print(items)
    '''
    items = re.findall('<dd>.*?board-index.*?>(.*?)</i>', html2, re.S)
    print(items)
    items1 = re.findall('<dd>.*?data-src="(.*?)"', html2, re.S)
    print(items1)
    items2 = re.findall('<dd>.*?name.*?a.*?>(.*?)</a>', html2, re.S)
    print(items2)
    return items
    '''

def main():
    htmlf = open('4.html', 'r', encoding='utf-8')
    html = htmlf.read()
    # url = 'http://maoyan.com/board/4'
    # html = get_one_page(url)
    # items = re.findall('<dd>.*?board-index.*?>(.*?)</i>', html, re.S)
    print(html)
    parse_one_page(html)


main()
